rm(list=ls())
library(rstanarm)
library(plyr)
library(arm)
library(foreign)
library(car)
library(reshape2)
library(data.table)
library(rstan)

megapoll <- read.dta("gay_marriage_megapoll_updated.dta")#read in polling data
#bring in poststrat data--using one I designed for years
poststrats.combined.wbh <- read.dta("poststrats_combined_wbh.dta", 	convert.underscore = FALSE)
#keep only years within range of megapoll
foo <- range(megapoll$year)
poststrats.combined.wbh <- subset(poststrats.combined.wbh, year>=foo[1] & year<=foo[2])

#merge presvote and p.relig
foo1 <- read.dta('pres_vote_interpolated_1996_2015.dta')
poststrats.combined.wbh <- join(poststrats.combined.wbh,foo1)
foo2 <- read.dta('prop_relig.dta')
poststrats.combined.wbh <- join(poststrats.combined.wbh,foo2)
poststrats.combined.wbh <- arrange(poststrats.combined.wbh, year, state)

#confirm it worked
with(foo1, table(dempresvote[state=="AR" & year==2002]))
with(poststrats.combined.wbh, table(dempresvote[state=="AR" & year==2002]))
with(foo2, table(p_relig[state=="AR"]))
with(poststrats.combined.wbh, table(p_relig[state=="AR"]))

#need to recover year trends and squares from poll data, since using arm:rescale (which depends on the data\)
megapoll$year_trend <- megapoll$year-min(megapoll$year)+1
megapoll$year_trend_squared <- megapoll$year_trend*megapoll$year_trend
megapoll$year_trend_rescaled <- arm::rescale(megapoll$year_trend)
megapoll$year_trend_squared_rescaled <- megapoll$year_trend_rescaled^2
temp.mean <- mean(megapoll$year_trend)
temp.sd <- sd(megapoll$year_trend)
poststrats.combined.wbh$year_trend <- poststrats.combined.wbh$year-min(megapoll$year)+1
poststrats.combined.wbh$year_trend_rescaled  <- (poststrats.combined.wbh$year_trend-temp.mean) / (2*temp.sd)
poststrats.combined.wbh$year_trend_squared_rescaled <- poststrats.combined.wbh$year_trend_rescaled^2
#confirm it worked
with(megapoll, table(year_trend_rescaled[year==1994]))
with(poststrats.combined.wbh, table(year_trend_rescaled[year==1994]))
with(megapoll, table(year_trend_squared_rescaled[year==1994]))
with(poststrats.combined.wbh, table(year_trend_squared_rescaled[year==1994]))

#LOAD STAN OUTPUT
load('gay_mar_STAN_quadratic_trend_by_state.Rdata')
#confirm model
stan.results <- gay.mar.stan.model
stan.results$formula
summary(stan.results, digits=3)
# par(mar=c(0,0,0,0))
# stan_plot(stan.results, pars=c("(Intercept)","log-posterior") , include=FALSE)
# stan_trace(stan.results, pars="(Intercept)", include=FALSE)
# stan_rhat(stan.results)
# stan_ess(stan.results)
n.eff <- stan.results$stan_summary[,"n_eff"]
rhats <- summary(stan.results)[,"Rhat"]
range(n.eff)
range(rhats)

megapoll <- stan.results$data
fixefs.stan <- fixef(stan.results)
ranefs.stan <-ranef(stan.results)

stan.pred.values.coefs.me <- invlogit (
  #Fixed effects
  fixefs.stan["(Intercept)"] +
    fixefs.stan["year_trend_rescaled"]* megapoll$year_trend_rescaled +
    fixefs.stan["year_trend_squared_rescaled"]* megapoll$year_trend_squared_rescaled +
    fixefs.stan["dempresvote"]* megapoll$dempresvote +
    fixefs.stan["p_relig"] * megapoll$p_relig +
    #by state
    ranefs.stan$state[,"(Intercept)"][megapoll$state] +
    ranefs.stan$state[,"year_trend_rescaled"][megapoll$state] * megapoll$year_trend_rescaled +
    ranefs.stan$state[,"year_trend_squared_rescaled"][megapoll$state] * megapoll$year_trend_squared_rescaled +
    #non-varying
    ranefs.stan$female_race_wbh[,1][megapoll$female_race_wbh]	 +
    ranefs.stan$edu[,1][megapoll$edu]	+
    ranefs.stan$age[,1][megapoll$age]
)

par(mfrow=c(2,1))


#NOW DO FULL STAN RESULTS FOR POSTSTRAT
#NOW WORK WITH MODEL RESULTS
fit_ss <- as.data.frame(stan.results)
#fit_ss <- fit_ss[1:10,]

#pull out lists into separate data frames (matrices)
n.draws <- nrow(fit_ss)
#pull out sets of coefficients and rename
#pull out sets of coefficients and rename
#fixefs
b_intercept <- fit_ss["(Intercept)"]
b_year_trend_rescaled <- fit_ss["year_trend_rescaled"]
b_year_trend_squared_rescaled <- fit_ss["year_trend_squared_rescaled"]
b_state_pred <- fit_ss["dempresvote"]
b_relig <- fit_ss["p_relig"]
#varying intercepts
b_female_race_wbh_intercepts <- fit_ss[  grep("female", names(fit_ss))]
names(b_female_race_wbh_intercepts ) <-  substring(names(b_female_race_wbh_intercepts ), 31, nchar(names(b_female_race_wbh_intercepts ))-1) 
names(b_female_race_wbh_intercepts) <- gsub("_", " ",names(b_female_race_wbh_intercepts))
b_edu_intercepts <- fit_ss[  grep("edu", names(fit_ss))]
names(b_edu_intercepts) <- substring(names(b_edu_intercepts), 19,20)
b_age_intercepts <- fit_ss[  grep("age", names(fit_ss))]
names(b_age_intercepts) <- substring(names(b_age_intercepts), 19,20)

#need to pull out different varying slopes
b_state_intercepts <- fit_ss[ intersect( grep("Intercept", names(fit_ss)), grep("state", names(fit_ss))) ]
names(b_state_intercepts) <- substring(names(b_state_intercepts), 21,22)
b_state_year_slopes <- fit_ss[ intersect( grep("year_trend_rescaled", names(fit_ss)), grep("state", names(fit_ss))) ]
names(b_state_year_slopes ) <- substring(names(b_state_year_slopes), 29,30)
b_state_year_squared_slopes <- fit_ss[ intersect( grep("year_trend_squared_rescaled", names(fit_ss)), grep("state", names(fit_ss))) ]
names(b_state_year_squared_slopes ) <- substring(names(b_state_year_squared_slopes), 37,38)

#confirm all names look right
names(b_state_intercepts)
names(b_state_year_slopes)
names(b_state_year_squared_slopes)
names(b_female_race_wbh_intercepts)
names(b_edu_intercepts) 
names(b_age_intercepts) 


#for each cell of poststrat, loop to get correct esitmates
L <- nrow(poststrats.combined.wbh)
poststrat.estimates <-  array(	NA, c(L,n.draws))

for (i in 1:n.draws){#loop over sims
  poststrat.estimates[,i] <- 
    unlist(b_intercept)[i] +  #overall intercept
    unlist(b_year_trend_rescaled)[i]*poststrats.combined.wbh$year_trend_rescaled + #overall trend
    unlist(b_year_trend_squared_rescaled)[i]*poststrats.combined.wbh$year_trend_squared_rescaled + #overall trend
    unlist(b_state_pred)[i]*poststrats.combined.wbh$dempresvote + #state predictor
    unlist(b_relig)[i]*poststrats.combined.wbh$p_relig + 		# catholics
    #BY STATE
    unlist(b_state_intercepts[i,]) [match( poststrats.combined.wbh$state, names(unlist(b_state_intercepts[i,])))] + 
    unlist(b_state_year_slopes[i,]) [match( poststrats.combined.wbh$state, names(unlist(b_state_year_slopes[i,])))]	*poststrats.combined.wbh$year_trend_rescaled +
    unlist(b_state_year_squared_slopes[i,]) [match( poststrats.combined.wbh$state, names(unlist(b_state_year_squared_slopes[i,])))]	*poststrats.combined.wbh$year_trend_squared_rescaled +
    #NON-VARYING demographics
    unlist(b_female_race_wbh_intercepts[i,]) [match( poststrats.combined.wbh$female_race_wbh, names(unlist(b_female_race_wbh_intercepts[i,])))] +
    unlist(b_edu_intercepts[i,]) [match( poststrats.combined.wbh$edu, names(unlist(b_edu_intercepts[i,])))] + 
    unlist(b_age_intercepts[i,]) [match( poststrats.combined.wbh$age, names(unlist(b_age_intercepts[i,])))] 
}
poststrat.estimates.foo	<- 100*invlogit(poststrat.estimates)

#now cbind estimates to poststrat
poststrat.estimates.full <- cbind(poststrats.combined.wbh, poststrat.estimates.foo)
#drop cats
drops <- c("age", "edu", "female_race_wbh", "cw_state_lib", "race_wbh", "female", "freq", "cfreq_state", "year_trend", 
           "year_trend_rescaled", "cath_adherents_prop","circuit", "dempresvote", "p_relig", "cpercent_national", "cfreq_national" )
poststrat.estimates.foo2 <- poststrat.estimates.full[ , !(names(poststrat.estimates.full) %in% drops)]                                                                                                                      

#now get estimates for each at level of state/yearr
dt <- as.data.table(poststrat.estimates.foo2)
state.pred <- dt[,lapply(.SD,weighted.mean,w=cpercent_state),by=list(state,year)]#same as ddply 
state.pred <- as.data.frame(state.pred)

#add X to sim names
start.col <- which(names(state.pred)=="1")
names(state.pred)[start.col:ncol(state.pred)] <- 	paste0("X",names(state.pred)[start.col:ncol(state.pred)])#add X's to sim names
#confirm it worked
ok <- with(poststrat.estimates.foo2, state=="NY" & year==1998)
with(poststrat.estimates.foo2, weighted.mean(poststrat.estimates.foo2[,start.col][ state=="NY" & year==1998], cpercent_state[ state=="NY" & year==1998]))
with(state.pred,X1[ state=="NY" & year==1998])
state.pred$cpercent_state <- NULL

##returns a matrix of issue/state/year x 1,000 sims
write.csv(state.pred, "state_preds_all_sims_from_STAN.csv", row.names=FALSE)

#now take means and quantiles aross sims -- use rowmeans / apply
temp_df <- state.pred[sapply(state.pred,is.numeric)]#pull out sims
temp_df$year <- NULL
temp_df$year_trend <- NULL
mrp.trend.median  <- apply(temp_df, 1,median)
mrp.mean  <- apply(temp_df, 1,mean)
mrp.trend.lower <-  apply(temp_df, 1, quantile, probs=.025)
mrp.trend.upper <-  apply(temp_df, 1, quantile, probs=.975)
cor(mrp.trend.median, mrp.mean  )
plot(mrp.trend.median, mrp.mean )
#create data frame to export
foo <- data.frame(state=state.pred[,"state"],year=state.pred[,"year"], mrp.trend.lower ,mrp.trend.median ,mrp.mean ,mrp.trend.upper )
#Write out to use later
write.csv(foo, "state_preds_means_quantiles_from_STAN.csv", row.names=FALSE)


##########################
#now do by circuit
##########################
#now cbind estimates to poststrat
drops <- c("age", "edu", "female_race_wbh", "cw_state_lib", "race_wbh", "female", "freq", "cfreq_state", "year_trend", 
           "year_trend_rescaled", "cath_adherents_prop","state" ,"dempresvote", "p_relig", "cpercent_national", "cfreq_national" )
poststrat.estimates.foo2 <- poststrat.estimates.full[ , !(names(poststrat.estimates.full) %in% drops)]                                                                                                                      

#now get estimates for each at level of state/year/issue
dt <- as.data.table(poststrat.estimates.foo2)
circuit.pred <- dt[,lapply(.SD,weighted.mean,w=cpercent_state),by=list(circuit,year)]#same as ddply 
circuit.pred <- as.data.frame(circuit.pred)

#add X to sim names
start.col <- which(names(circuit.pred)=="1")
names(circuit.pred)[start.col:ncol(circuit.pred)] <- 	paste0("X",names(circuit.pred)[start.col:ncol(circuit.pred)])#add X's to sim names
#confirm it worked
ok <- with(poststrat.estimates.foo2, circuit==9 & year==1998 )
with(poststrat.estimates.foo2, weighted.mean(poststrat.estimates.foo2[,start.col][ circuit==9 & year==1998 ], cpercent_state[ circuit==9 		& year==1998 ]))
with(circuit.pred,X1[ circuit==9 & year==1998 ])
circuit.pred$cpercent_state <- NULL

##returns a matrix of issue/state/year x 1,000 sims
circuit.pred <- arrange(circuit.pred, circuit, year)
write.csv(circuit.pred, "circuit_preds_all_sims_from_STAN.csv",  row.names=FALSE)

#now take means and quantiles aross sims -- use rowmeans / apply
temp_df <- circuit.pred[sapply(circuit.pred,is.numeric)]#pull out sims
temp_df$year <- NULL
temp_df$year_trend <- NULL
mrp.trend.median  <- apply(temp_df, 1,median)
mrp.trend.mean  <- apply(temp_df, 1,mean)
mrp.trend.lower <-  apply(temp_df, 1, quantile, probs=.025)
mrp.trend.upper <-  apply(temp_df, 1, quantile, probs=.975)
cor(mrp.trend.median, mrp.trend.mean  )
plot(mrp.trend.median, mrp.trend.mean )
#create data frame to export
foo <- data.frame(circuit=circuit.pred[,"circuit"],year=circuit.pred[,"year"], mrp.trend.lower ,mrp.trend.median ,mrp.trend.mean ,mrp.trend.upper )
#Write out to use later
write.csv(foo, "circuit_preds_means_quantiles_from_STAN.csv", row.names=FALSE)

# ############################################################################################
# #now do national-level estimates
# #merge national weights
drops <- c("age", "edu", "female_race_wbh", "cw_state_lib", "race_wbh", "female", "freq", "cfreq_state", "year_trend", 
           "year_trend_rescaled", "cath_adherents_prop","state", "cfreq_national", "cpercent_state", "circuit","dempresvote", "p_relig" )
poststrat.estimates.foo2 <- poststrat.estimates.full[ , !(names(poststrat.estimates.full) %in% drops)]      

#now get estimates for each at level of national/year/issue
dt <- as.data.table(poststrat.estimates.foo2)
national.pred <- dt[,lapply(.SD,weighted.mean,w=cpercent_national),by=list(year)] 
national.pred <- as.data.frame(national.pred)

#add X to sim names
start.col <- which(names(national.pred)=="1")
names(national.pred)[start.col:ncol(national.pred)] <- 	paste0("X",names(national.pred)[start.col:ncol(national.pred)])#add X's to sim names
#confirm it worked
ok <- with(poststrat.estimates.foo2, year==1998 )
with(poststrat.estimates.foo2, weighted.mean(poststrat.estimates.foo2[,start.col][ year==1998 ], 	cpercent_national[ year==1998 ]))
with(national.pred,X1[ year==1998 ])
national.pred$cpercent_national <- NULL

##returns a matrix of issue/state/year x 1,000 sims
write.csv(national.pred, "national_preds_all_sims_from_STAN.csv", row.names=FALSE)

#now take means and quantiles aross sims -- use rowmeans / apply
temp_df <- national.pred[sapply(national.pred,is.numeric)]#pull out sims
temp_df$year <- NULL
temp_df$year_trend <- NULL
mrp.trend.median  <- apply(temp_df, 1,median)
mrp.trend.mean  <- apply(temp_df, 1,mean)
mrp.trend.lower <-  apply(temp_df, 1, quantile, probs=.025)
mrp.trend.upper <-  apply(temp_df, 1, quantile, probs=.975)
cor(mrp.trend.median, mrp.trend.mean  )
plot(mrp.trend.median, mrp.trend.mean )
#create data frame to export
foo <- data.frame(year=national.pred[,"year"], mrp.trend.lower ,mrp.trend.median ,mrp.trend.mean ,mrp.trend.upper )
foo <- arrange(foo, year)
#Write out to use later
write.csv(foo, "national_preds_means_quantiles_from_STAN.csv", row.names=FALSE)



